/* This program creates core LBD data, 1977-1997.
   

*/


log using "mk_lbd_mylog.log", replace

global path
global programs
global logs
global rawlbd
global imported
global inter
global output
global tables
global graphs


****************************************
*** PREP LBD Files for SIZE AT ENTRY ***
****************************************

foreach year of numlist 1977/1997 {

use ${rawlbd}lbd`year'.dta, clear
keep yr emp pay mu firmid lbdnum firstyear lastyear


save ${inter}saelbd`year'.dta, replace
}

foreach year of numlist 1977/1996 {
   append using ${inter}saelbd`year'.dta
   rm ${inter}saelbd`year'.dta
}
rm ${inter}saelbd1997.dta

rename yr year

drop if firmid=="" & emp==. & pay==.
drop if emp==. & pay==.
drop if firmid==""

* guys with 0 emp are estab size of 1 
replace emp = 1 if emp==0 & year==firstyear
gen sae = emp if year==firstyear
* get sae (best we can do) for those who entered before 1977
replace sae = emp if year==1977 & firstyear<1977
egen temp = max(sae), by(lbdnum)
replace sae = temp if sae==. & temp!=.
drop temp

gen muae = mu if year==firstyear
replace muae = mu if year==1977 & firstyear<1977
egen temp = max(muae), by(lbdnum)
replace muae = temp if muae==. & temp!=.
drop temp


contract lbdnum firstyear sae muae
drop _freq

duplicates tag lbdnum, gen(dup)
tab dup, miss
egen saefirstyear = min(firstyear) if dup==1, by(lbdnum)
egen maxfirstyear=max(firstyear) if dup==1, by(lbdnum)
gen fyrdiff = maxfirstyear-saefirstyear if dup==1
tab fyrdiff, miss
replace saefirstyear = firstyear if dup==0
drop dup
collapse (min) saefirstyear sae  muae , by(lbdnum)
duplicates tag lbdnum, gen(dup)
tab dup, miss
drop dup

sort lbdnum
save ${inter}sae_lbdnum.dta, replace

******************************
*** END SIZE AT ENTRY PREP ***
******************************


foreach year of numlist 1977/1997 {

use ${rawlbd}lbd`year'.dta, clear

rename yr year

keep if (act=="" | act=="A")
drop if (flaga=="D" | flaga=="I")

gen lenst = length(state)
tab lenst, miss
drop if lenst!=2
drop lenst
gen lencty = length(county)
tab lencty, miss
drop if lencty!=3
drop lencty

egen fips = concat(state county)
destring fips, gen(numfips) force
drop if numfips==.

sort flagb
egen firmbcd = group(flagb)
label define firmbcd 1 "birth" 2 "continuer" 3 "death"
label values firmbcd firmbcd

* clean industry codes (need sic3)
replace sic = bestsic if (sic=="" | sic=="000000")
replace sic = "" if sic=="000000"
gen sic3 = substr(sic,1,3)
destring sic3, gen(numsic3) force
drop if numsic3==.
drop if sic3=="000" | sic3=="00"
   drop if sic3=="000" | sic3=="012" | sic3=="014" | sic3=="020" | sic3=="022" | sic3=="036" | sic3=="037" | sic3=="070" | sic3=="073" | sic3=="079" | sic3=="080" | sic3=="082" | sic3=="084" | sic3=="086" | sic3=="090" | sic3=="099"
count if sic3==""
di "fraction of obs with blank sic"
disp r(N)/_N
drop if sic3==""

di "total estabs in data year `year'"
count

drop if emp==0 & pay==0
drop if emp==.
drop if emp==0
drop if emp<0

gen ppw = pay/emp

merge m:1 lbdnum using ${inter}sae_lbdnum.dta
keep if _m==3
drop _m
count if sae==.
drop if sae==.
count if muae==.
drop if muae==.

gen entrant = (firstyear>=1981 & firstyear!=.)
gen muentrant = (entrant==1 & muae==1)
gen suentrant = (entrant==1 & muae==0)

gen entemp = emp if entrant==1
gen muentemp = emp if entrant==1 & muae==1
gen suentemp = emp if entrant==1 & muae==0

gen incumbent = (firstyear<=1980)
gen incemp = emp if incumbent==1
gen muincemp = emp if incumbent==1 & muae==1
gen suincemp = emp if incumbent==1 & muae==0
gen survincemp97 = emp if incumbent==1 & lastyear>=1997 
gen dyincemp97 = emp if incumbent==1 & lastyear<1997
gen survincemp = emp if incumbent==1 & lastyear>year 
gen dyincemp = emp if incumbent==1 & lastyear<=year


gen cyentrant = (year==firstyear)
gen cymuentrant = (cyentrant==1 & muae==1)
gen cysuentrant = (cyentrant==1 & muae==0)
gen cyexit = (year==lastyear)

gen cyentemp = emp if cyentrant==1
gen cymuentemp = emp if cyentrant==1 & muae==1
gen cysuentemp = emp if cyentrant==1 & muae==0

* estab size at entry cats
gen es = 1 if sae>=1 & sae<=25
replace es = 2 if sae>25 & sae<=100
replace es = 3 if sae>100 & sae<=1000
replace es = 4 if sae>1000 & sae!=.


foreach num in 1 2 3 4 {
   gen es`num'emp = emp if es==`num'
   gen es`num'entemp = emp if es==`num' & entrant==1
   gen es`num'muentemp = emp if es==`num' & entrant==1 & muae==1
   gen es`num'suentemp = emp if es==`num' & entrant==1 & muae==0
   gen es`num'incemp = emp if es==`num' & incumbent==1
   gen es`num'muincemp = emp if es==`num' & incumbent==1 & muae==1
   gen es`num'suincemp = emp if es==`num' & incumbent==1 & muae==0
}

foreach num in 1 2 3 4 {
   gen es`num'entrant = 1 if es==`num' & entrant==1
}

gen estabcounter = 1 if lbdnum!=""

preserve
   collapse (mean) ppw avgestsize=emp entrate=entrant [aw=emp], by(fips numfips sic3 numsic3 year)
   foreach var in ppw avgestsize entrate {
      rename `var' ew`var'
   }
  save ${inter}ewlbd`year'.dta, replace
restore


* collapse to county-industry-year level (fips-sic3-year)

collapse (sum) emp pay entemp muentemp suentemp incemp muincemp suincemp cyentemp cymuentemp cysuentemp entrants=entrant cyentrants=cyentrant cyexits=cyexit es1emp-es4suincemp es1entrant-es4entrant survincemp97-dyincemp (mean) ppw avgestsize=emp avgentestsize=cyentemp entrate=entrant (count) numestabs=estabcounter, by(fips numfips sic3 numsic3 year)

merge 1:1 fips sic3 year using ${inter}ewlbd`year'.dta
drop if _m==2
drop _m

save ${inter}lbd`year'_fips_sic3.dta, replace

}



* 1997 data should be in memory
foreach year of numlist 1977/1996 {
   append using ${inter}lbd`year'_fips_sic3.dta
}


save ${inter}lbd_1977_1997_fips_sic3.dta, replace



log close


